#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

from pymongo import MongoClient
from tqdm import tqdm

valid_chrom = set(
    ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
     "21", "22"])

client = MongoClient("mongodb://han:han2019@mongo-han.handb:27017")
collection1 = client["hanvcf"]["all_20210521"]
collection2 = client["hanvcf"]["wgs_20210521"]
collection3 = client["hanvcf"]["wes_20210521"]
collection4 = client["hanvcf"]["array_20210521"]

infile = open("variation_info_GRCh38.txt", "r")
outfile = open("variation_count.tsv", "w")
outfile.write("chrom\tpos\tall\twgs\twes\tarray\n")
outfile.flush()
infile.readline()
for line in tqdm(infile):
    line = line.rstrip("\n")
    splitline = line.split("\t")
    chrom = splitline[0]
    pos = int(splitline[1])
    if chrom in valid_chrom:
        chrom = int(chrom)
        result1 = collection1.count_documents({"chrom": chrom, "pos": pos})
        result2 = collection1.count_documents({"chrom": chrom, "pos": pos})
        result3 = collection1.count_documents({"chrom": chrom, "pos": pos})
        result4 = collection1.count_documents({"chrom": chrom, "pos": pos})
        outfile.write("%d\t%d\t%d\t%d\t%d\t%d\n" % (chrom, pos, result1, result2, result3, result4))
        outfile.flush()
infile.close()
outfile.close()
